# Discretion in Clinical Decision Making: Evidence from Bunching
# Claire Boone
# Longitudinal patient-level analyses
# Creates appendix tables: A19, A20, A21, A22, A27


# packages and server -----------------
  rm(list = ls()) 
  library(tidyverse)
  library(magrittr)
  library(bunching)
  library(magrittr)
  library(dplyr)
  library(coefplot)
  library(lfe)
  library(outreg)
  library(stargazer)
  library(scales)
  library(gplots)
  library(expss)
  library(RColorBrewer)
  library(here)

  proj_dir <- here::here()
  raw_data <- file.path(proj_dir, "raw_data/")
  gen_data <- file.path(proj_dir, "gen_data/")
  out <- file.path(proj_dir, "out/")

# load data --------------------
  
  load(paste0(gen_data,"included_ids.Rda"))
  load(paste0(gen_data,"ehr_cleaned.Rda"))  

  
# determine patients' last visit in the EHR data -----------  
  ld <- dplyr::left_join(hdi, hd, by = "id")
  rm(hdi)
  rm(hd)
  
  ld %<>%  dplyr::arrange(id, fecha_atencion) %<>% dplyr::group_by(id) %<>% 
    dplyr::mutate(visit_n = row_number()) %<>% ungroup()
  ld %<>%  dplyr::group_by(id) %<>% 
    dplyr::mutate(visit_max = max(visit_n)) %<>% ungroup()
  ld %<>%  dplyr::group_by(id) %<>% 
    dplyr::mutate(visit_max = ifelse(visit_n == visit_max, 1, 0)) %<>% ungroup()
  ld %<>% dplyr::mutate(visit_max_date = ifelse(visit_max==1, fecha_atencion, 1900-01-01)) 
  ld %<>%  dplyr::group_by(id) %<>% 
    dplyr::mutate(visit_max_date = max(visit_max_date)) %<>% ungroup()
  ld$visit_max_date <- as.Date(ld$visit_max_date, origin = "1970-01-01")
  ld %<>%  dplyr::group_by(id) %<>% 
    dplyr::mutate(visit_max = max(visit_n)) %<>% ungroup()
  
  ld %<>% dplyr::filter(visit_n==1)
  ld %<>% dplyr::select(id, visit_max_date, visit_max)
  
  
# do the same with hospitalization data ---------------
  load(paste0(gen_data,"hospital_cleaned.Rda"))
  load(paste0(gen_data,"primary_hospital_cleaned_included.Rda"))
  
  hdd <- hd
  hdd %<>% dplyr::select(id, fecha_atencion)
  rm(hd)
  
  ed <- dplyr::left_join(hdd, ed, by='id') 
 
  ed %<>% dplyr::mutate(time_to_hosp = as.numeric(fecha_egr - fecha_atencion))

  ed %<>% dplyr::filter(time_to_hosp>=0)
  ed %<>% dplyr::select(id, fecha_egr, death)
  
  ed %<>% dplyr::arrange(id, fecha_egr) %<>% dplyr::group_by(id) %<>% 
    dplyr::mutate(hosp_n = row_number()) %<>% ungroup()
  ed %<>%  dplyr::group_by(id) %<>% 
    dplyr::mutate(hosp_max = max(hosp_n)) %<>% ungroup()
  ed %<>%  dplyr::group_by(id) %<>% 
    dplyr::mutate(hosp_max = ifelse(hosp_n == hosp_max, 1, 0)) %<>% ungroup()
  ed %<>% dplyr::mutate(hosp_max_date = ifelse(hosp_max==1, fecha_egr, 1900-01-01)) 
  ed %<>%  dplyr::group_by(id) %<>% 
    dplyr::mutate(hosp_max_date = max(hosp_max_date)) %<>% ungroup()
  ed$hosp_max_date <- as.Date(ed$hosp_max_date, origin = "1970-01-01")
  ed %<>%  dplyr::group_by(id) %<>% 
    dplyr::mutate(hosp_max = max(hosp_n)) %<>% ungroup()
  
  ed %<>% dplyr::mutate(date_death_hosp = ifelse(death==1, fecha_egr, NA))
  ed %<>%  dplyr::group_by(id) %<>% 
    dplyr::mutate(date_death_hosp = max(date_death_hosp, na.rm=T)) %<>% ungroup() 
  ed$date_death_hosp <- as.Date(ed$date_death_hosp, origin = "1970-01-01")
  
  ed %<>% dplyr::filter(hosp_n==1)
  ed %<>% dplyr::select(id, hosp_max_date, date_death_hosp)
  
  rm(hdd)
  
 
  
# join with EHR data  ----------
  hd <- dplyr::left_join(hd, ld, by="id")  
  hd <- dplyr::left_join(hd, ed, by="id")  

  length(unique(hd$id)) 
  
  rm(ld)
  rm(ed)

  hd %<>% dplyr::mutate(visit_fu_time = as.numeric(visit_max_date - fecha_atencion))
  hd %<>% dplyr::mutate(hosp_fu_time = as.numeric(hosp_max_date - fecha_atencion))
  hd %<>% dplyr::mutate(death_fu_time = as.numeric(date_death_hosp - fecha_atencion))
  hd %<>% dplyr::mutate(death_fu_time = ifelse(is.infinite(death_fu_time), NA, death_fu_time))
  
  hd %<>% dplyr::rowwise() %<>% 
    dplyr::mutate(max_fu_time = max(c(visit_fu_time, hosp_fu_time, death_fu_time), na.rm=T))
  
  hd %<>% dplyr::mutate(alive30 = ifelse(max_fu_time>=30, 1, 0))
  hd %<>% dplyr::mutate(alive90 = ifelse(max_fu_time>=90, 1, 0))
  hd %<>% dplyr::mutate(alive180 = ifelse(max_fu_time>=180, 1, 0))
  hd %<>% dplyr::mutate(alive365 = ifelse(max_fu_time>=365, 1, 0))
  
# (TAB A19) -----  
  summary(hd$alive30)
  summary(hd$alive90)
  summary(hd$alive180)
  summary(hd$alive365)

# (TABS A20, A21, A22) ---------   
  # functions:
  mean1 <- function(x) {
    meantest <- round(mean(hd[[paste0(x)]], na.rm=T),3)
  }
  clinics1 <- function(x) {
    if (any(!is.na(hd[[paste0(x)]]))) {
      clinictest <- length(unique(hd[["cod_est"]]))
      return(clinictest) 
    } else {
      return(NA) 
    }
  }
  
  outcomes <- c('alive90', 'alive180', 'alive365')
  
  meandf <- as.character(lapply(outcomes, mean1))
  clinicdf <- as.character(lapply(outcomes, clinics1))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
    felm(form, data = hd, exactDOF=TRUE)
  })
  
  stargazer(reglist1, title="Impact of Bunching on Time Patient is Observed in the Data", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:appendix_alivetime",
            dep.var.labels= c('\\textbf{Retained 3 mo.}', '\\textbf{Retained 6 mo.}', '\\textbf{Retained 12 mo.}'),
            omit.table.layout = "n", font.size = "small",
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Bunch x BP$<$140}", "Bunch","BP$<$140"),
            out = paste0(leaf, "results_alive_time.tex"))
  

  
  # hospitalization outcomes:
  hd$cv_stroke_90_h <- hd$cv_stroke_90*100
  hd$cv_acs_90_h <- hd$cv_acs_90 *100
  hd$cv_chf_90_h <- hd$cv_chf_90 *100
  hd$cv_stroke_180_h <- hd$cv_stroke_180*100
  hd$cv_acs_180_h <- hd$cv_acs_180 *100
  hd$cv_chf_180_h <- hd$cv_chf_180 *100
  hd$cv_stroke_365_h <- hd$cv_stroke_365*100
  hd$cv_acs_365_h <- hd$cv_acs_365 *100
  hd$cv_chf_365_h <- hd$cv_chf_365 *100
  
  # within each group:
  
  ## 3 months
    hd %<>% dplyr::ungroup()
    dd <- hd
    dd %<>% dplyr::filter(alive90==1)
    
    outcomes <- c( 'cv_stroke_90_h', 'cv_acs_90_h', 'cv_chf_90_h')
    
    meandd <- function(x) {
      meantest <- round(mean(dd[[paste0(x)]], na.rm=T),3)
    }
    clinicsdd <- function(x) {
      if(!is.na(dd[[paste0(x)]])) {
        clinictest <- length(unique(dd[["cod_est"]]))
      }
    }
    meandd <- as.character(lapply(outcomes, meandd))
    clinicdd <- as.character(lapply(outcomes, clinicsdd))
    
    reglist1 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
      felm(form, data = dd, exactDOF=TRUE)
    })
    
  ## 6 months
    de <- hd
    de %<>% dplyr::filter(alive180==1)
    
    outcomes <- c( 'cv_stroke_180_h', 'cv_acs_180_h', 'cv_chf_180_h')
  
    meande <- function(x) {
      meantest <- round(mean(de[[paste0(x)]], na.rm=T),3)
    }
    clinicsde <- function(x) {
      if(!is.na(dd[[paste0(x)]])) {
        clinictest <- length(unique(de[["cod_est"]]))
      }
    }
    meande <- as.character(lapply(outcomes, meande))
    clinicde <- as.character(lapply(outcomes, clinicsde))
    
    reglist2 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
      felm(form, data = de, exactDOF=TRUE)
    })  
    
    ## 12 months
    df <- hd
    df %<>% dplyr::filter(alive365==1)
    
    outcomes <- c( 'cv_stroke_365_h', 'cv_acs_365_h', 'cv_chf_365_h')
    
    meandf <- function(x) {
      meantest <- round(mean(df[[paste0(x)]], na.rm=T),3)
    }
    clinicsdf <- function(x) {
      if(!is.na(df[[paste0(x)]])) {
        clinictest <- length(unique(df[["cod_est"]]))
      }
    }
    meandf <- as.character(lapply(outcomes, meandf))
    clinicdf <- as.character(lapply(outcomes, clinicsdf))
    
    reglist3 <- lapply(outcomes, function(x) {
      form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
      felm(form, data = df, exactDOF=TRUE)
    })
  
  
  reglist1[[length(reglist1) + 1]] <- reglist2
  reglist1[[length(reglist1) + 1]] <- reglist3
  meandf <- cbind(meandd, meande, meandf)
  clinicdf <- cbind(clinicdd, clinicde, clinicdf)
  
  stargazer(reglist1, title="Impact of Bunching on Cardiovascular Hospitalization 3, 6, and 12 Months After Primary Care Visit, Among Retained Patients", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.(\\%)", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:appendix_alive_cv",
            omit.table.layout = "n", font.size = "small",
            dep.var.labels= c('Stroke', 'Heart attack', 'CHF', 'Stroke', 'Heart attack', 'CHF', 'Stroke', 'Heart attack', 'CHF'),
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Bunch x BP$<$140}", "Bunch","BP$<$140"),
            out = paste0(leaf, "results_cv_conditional_alive.tex"))
  
    
  # hospitalization outcomes:
  
  hd$any_hosp_90_h <-  hd$any_hosp_90 *100
  hd$any_hosp_180_h <- hd$any_hosp_180 *100
  hd$any_hosp_365_h <- hd$any_hosp_365 *100
  
  ## 3 months
  dd <- hd
  dd %<>% dplyr::filter(alive90==1)
  
  outcomes <- c( 'any_hosp_90_h')
  
  meandd <- function(x) {
    meantest <- round(mean(dd[[paste0(x)]], na.rm=T),3)
  }
  clinicsdd <- function(x) {
    if(!is.na(dd[[paste0(x)]])) {
      clinictest <- length(unique(dd[["cod_est"]]))
    }
  }
  meandd <- as.character(lapply(outcomes, meandd))
  clinicdd <- as.character(lapply(outcomes, clinicsdd))
  
  reglist1 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
    felm(form, data = dd, exactDOF=TRUE)
  })
  
  ## 6 months
  de <- hd
  de %<>% dplyr::filter(alive180==1)
  
  outcomes <- c( 'any_hosp_180_h')
  
  meande <- function(x) {
    meantest <- round(mean(de[[paste0(x)]], na.rm=T),3)
  }
  clinicsde <- function(x) {
    if(!is.na(dd[[paste0(x)]])) {
      clinictest <- length(unique(de[["cod_est"]]))
    }
  }
  meande <- as.character(lapply(outcomes, meande))
  clinicde <- as.character(lapply(outcomes, clinicsde))
  
  reglist2 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
    felm(form, data = de, exactDOF=TRUE)
  })  
  
  
  
  ## 12 months
  df <- hd
  df %<>% dplyr::filter(alive365==1)
  
  outcomes <- c('any_hosp_365_h')
  
  meandf <- function(x) {
    meantest <- round(mean(df[[paste0(x)]], na.rm=T),3)
  }
  clinicsdf <- function(x) {
    if(!is.na(df[[paste0(x)]])) {
      clinictest <- length(unique(df[["cod_est"]]))
    }
  }
  meandf <- as.character(lapply(outcomes, meandf))
  clinicdf <- as.character(lapply(outcomes, clinicsdf))
  
  reglist3 <- lapply(outcomes, function(x) {
    form <- as.formula(paste0(x, " ~ sig_neg_bunch_mag*under140   | year + quarter   + male + age_round  | 0 | cod_est"))
    felm(form, data = df, exactDOF=TRUE)
  })
  
  
  reglist1[[length(reglist1) + 1]] <- reglist2
  reglist1[[length(reglist1) + 1]] <- reglist3
  meandf <- cbind(meandd, meande, meandf)
  clinicdf <- cbind(clinicdd, clinicde, clinicdf)
  
  stargazer(reglist1, title="Impact of Bunching on Any Hospitalization 3, 6, and 12 Months After Primary Care Visit, Among Retained Patients", keep.stat=c("n"),
            add.lines=list(c("Mean dep. var.(\\%)", meandf), c("Clinics", clinicdf)), 
            dep.var.caption = "", table.layout ="-d#-t-sna-", label = "tab:appendix_alive_allcause",
            omit.table.layout = "n", font.size = "small",
            dep.var.labels= c('Hosp $<$3 months', 'Hosp $<$6 months', 'Hosp $<$12 months'),
            order=c("sig_neg_bunch_mag:under140", "sig_neg_bunch_mag", "under140"),
            covariate.labels=c("\\textbf{Bunch x BP$<$140}", "Bunch","BP$<$140"),
            out = paste0(leaf, "results_any_conditional_alive.tex"))
  
  
  
# (TAB A27)  -----------------------------
  
  load(paste0(gen_data,"ehr_cleaned_analysis_sample.Rda"))
  load(paste0(gen_data, "included_ids.Rda"))
  
  hd <- dplyr::left_join(hdi, hd, by = "id")
  
  hd %<>% dplyr::mutate(days_first_to_diag = as.numeric(date_diag_htn - fecha_atencion))
  
  hd %<>%
    dplyr::group_by(id) %>%
    dplyr::arrange(id, rows_n) %>%
    dplyr::mutate(
      days_to_next_vis = as.numeric(dplyr::lead(fecha_atencion) - fecha_atencion),
      last_visit       = as.integer(fecha_atencion == max(fecha_atencion)),
      first_to_last    = as.numeric(max(fecha_atencion) - min(fecha_atencion))
    ) %>%
    dplyr::ungroup()
  

  counts_tag1 <- hd %>%
    dplyr::filter(tag1 == 1) %>%
    dplyr::group_by(sig_neg_bunch) %>%
    dplyr::summarise(
      patients = dplyr::n_distinct(id),
      visits   = dplyr::n(),
      .groups  = "drop"
    )
  
  counts_tag2 <- hd %>%
    dplyr::filter(tag2 == 1) %>%
    dplyr::group_by(sig_neg_bunch) %>%
    dplyr::summarise(
      patients = dplyr::n_distinct(id),
      visits   = dplyr::n(),
      .groups  = "drop"
    )
  
  readr::write_csv(counts_tag1, file.path(out, "sum_by_tag1.csv"))
  readr::write_csv(counts_tag2, file.path(out, "sum_by_tag2.csv"))
  
  sum_tag2 <- hd %>%
    dplyr::filter(tag2 == 1) %>%
    dplyr::group_by(id) %>%
    dplyr::arrange(rows_n) %>%
    dplyr::mutate(
      rows_to_diag = dplyr::if_else(date_diag_htn == fecha_atencion,
                                    rows_n - 1L, NA_integer_)
    ) %>%
    tidyr::fill(rows_to_diag, .direction = "downup") %>%
    dplyr::filter(rows_n == 1) %>%
    dplyr::mutate(diag_next_visit = as.integer(rows_to_diag == 1L)) %>%
    dplyr::ungroup()
  
  sum_tag2_agg <- sum_tag2 %>%
    dplyr::group_by(sig_neg_bunch) %>%
    dplyr::summarise(
      mean_fu_visits_to_diag = mean(rows_to_diag, na.rm = TRUE),
      mean_days_to_diag      = mean(first_to_last, na.rm = TRUE),
      diag_next_visit        = mean(diag_next_visit, na.rm = TRUE),
      .groups = "drop"
    )
  
  readr::write_csv(sum_tag2_agg, file.path(out, "sum_by_tag2_baseline.csv"))
  
  
  
  